Tensorflow RNN¶
In [ ]:
!pip install note_seq
In [ ]:
import numpy as np
import tensorflow as tf
import keras
from keras import layers
import os
import zipfile
import random
from sklearn.model_selection import train_test_split
from music21 import converter, instrument, note, chord, stream
import tensorflow as tf
from google.protobuf import text_format
# from note_seq.protobuf import music_pb2
from sklearn.model_selection import train_test_split
# import tensorflow as andrew
In [ ]:
!wget "https://storage.googleapis.com/magentadata/datasets/bach-doodle/bach-doodle.jsonl-00000-of-00192.gz"
In [ ]:
###################
# Data Extraction #
###################
In [ ]:
file_count = 100000
count = 0
input_path = "bach-doodle.jsonl-00000-of-00192.gz"
dataset = []
import json
import gzip
with gzip.open(input_path, 'rb') as f_in:
file_content = f_in.read().decode('utf-8')
# Split the content into lines and iterate
for line in file_content.splitlines():
count += 1
record = json.loads(line.strip())
dataset.append(record)
if count >= file_count:
break
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
In [ ]:
################################################################################
# Task1: Symbolic, unconditioned generation #
################################################################################
In [ ]:
!apt update && apt install fluidsynth -y
!brew install wget
!wget https://raw.githubusercontent.com/musescore/MuseScore/master/share/sound/FluidR3Mono_GM.sf3
!pip install midi2audio
!pip install IPython
!pip install midiutil
In [ ]:
##### Baseline: Markov Chain #####
from collections import defaultdict
import random
from numpy.random import choice
import numpy as np
import midi2audio
a = open("chords.json")
dataset = []
for l in a.readlines():
d = eval(l)
dataset.append(d)
flatDataset = []
for d in dataset:
flat = []
for part in d['chords']:
for bar in part:
flat += bar
flatDataset.append(flat)
unigrams = defaultdict(int)
bigrams = defaultdict(int)
for d in flatDataset:
for chord in d:
unigrams[chord] += 1
for (chord1,chord2) in zip(d[:-1],d[1:]):
bigrams[(chord1,chord2)] += 1
unigramCounts = [(unigrams[k],k) for k in unigrams]
bigramCounts = [(bigrams[k],k) for k in bigrams]
unigramCounts.sort()
bigramCounts.sort()
dictionary = set(flatDataset[3])
transitions = defaultdict(list)
transitionProbabilities = defaultdict(list)
for b1,b2 in bigrams:
if b1 in dictionary and b2 in dictionary:
transitions[b1].append(b2)
transitionProbabilities[b1].append(bigrams[(b1,b2)])
def sample(length):
seq = [random.choice(list(transitionProbabilities.keys()))]
while len(seq) < length:
probs = np.array(transitionProbabilities[seq[-1]])
if not np.isclose(probs.sum(), 1.0):
probs = probs / probs.sum()
nextchord = choice(transitions[seq[-1]], 1, p=probs)
seq.append(nextchord.item())
return seq
In [ ]:
##### Generating Output Files #####
KEY_TO_IDX = {
'C': 0,
'C#': 1,
'Db': 1,
'D': 2,
'D#': 3,
'Eb': 3,
'E': 4,
'F': 5,
'F#': 6,
'Gb': 6,
'G': 7,
'G#': 8,
'Ab': 8,
'A': 9,
'A#': 10,
'Bb': 10,
'B': 11,
'Cb': 11,
}
# cover some of the qualities
QUALITY_TO_INTERVAL = {
# 1 2 3 4 5 6 7
'': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0], # major
'-': [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], # minor
'+': [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0], # augmented
'o': [1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0], # diminished
'sus': [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0], # suspended
'7': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], # dominant 7th
'7alt': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0], # dominant 7th
'j7': [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1], # major 7th
'-7': [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0], # minor 7th
'o7': [1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0], # diminished 7th
'm7b5': [1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0], # half-diminished
'6': [1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0], # major 6th
'-6': [1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0], # minor 6th
}
def chord_to_notes(chord):
if len(chord) == 1 or chord[1] not in ['b','#']:
root = chord[:1]
quality = chord[1:]
else:
root = chord[:2]
quality = chord[2:]
bass = root
root_c = 60
bass_c = 36
root_pc = KEY_TO_IDX[root]
if quality not in QUALITY_TO_INTERVAL:
raise ValueError('undefined chord quality {}'.format(quality))
chord_map = list(np.where(np.array(QUALITY_TO_INTERVAL[quality]) == 1)[0])
bass_pc = KEY_TO_IDX[bass]
return [bass_c + bass_pc] + [root_c + root_pc + i for i in chord_map]
from midiutil import MIDIFile
midi = MIDIFile(1) # Create a MIDI file that consists of 1 track
track = 0 # Set track number
time = 0 # Where is the event placed (at the beginning)
tempo = 120 # The tempo (beats per minute)
midi.addTempo(track, time, tempo) # Add tempo information
baseline1_chords = sample(10)
current_time = 0
default_duration = 4 # one beat
for chord in baseline1_chords:
notes = chord_to_notes(chord)
print(notes)
for pitch in notes:
midi.addNote(track, 0, pitch, current_time, default_duration, 100)
current_time += default_duration
with open("chord_sample.mid", "wb") as f:
midi.writeFile(f) # write MIDI file
from midi2audio import FluidSynth # Import library
from IPython.display import Audio, display
fs = FluidSynth("FluidR3Mono_GM.sf3") # Initialize FluidSynth
# for i in range(len(predictions)):
fs.midi_to_audio("chord_sample.mid", "chord_sample.wav")
display(Audio("chord_sample.wav"))
[45, np.int64(69), np.int64(72), np.int64(75), np.int64(78)] [39, np.int64(63), np.int64(67), np.int64(70), np.int64(72)] [36, np.int64(60), np.int64(64), np.int64(67), np.int64(70)] [36, np.int64(60), np.int64(64), np.int64(67), np.int64(70)] [36, np.int64(60), np.int64(64), np.int64(67), np.int64(70)] [41, np.int64(65), np.int64(69), np.int64(72), np.int64(75)] [41, np.int64(65), np.int64(69), np.int64(72), np.int64(75)] [41, np.int64(65), np.int64(69), np.int64(72), np.int64(75)] [46, np.int64(70), np.int64(74), np.int64(77), np.int64(80)] [46, np.int64(70), np.int64(74), np.int64(77), np.int64(80)]
In [ ]:
########### Our Model ############
In [ ]:
'''
Preparing and loading the data for the training and test set
'''
sequence_length = 20
#Get a random sample of the data dataset
random.seed(0)
subset_size = 1000
subset_data = random.sample(train_data, min(subset_size, len(train_data)))
sequences = []
for record in train_data:
#Only train on datapoints that produced good harmonies
# e.g. when feedback = 2
if record.get('feedback', [None])[0] == '2':
output_seq = record.get('output_sequence', [])
sequence = []
for timestep in output_seq:
notes = timestep.get('notes', [])
if notes:
chord = [note['pitch'] for note in notes]
sequence.append(chord)
if sequence:
sequences.append(sequence)
print(f"Number of sequences with feedback=2: {len(sequences)}")
# Create vocabulary
all_pitches = set(p for seq in sequences for chord in seq for p in chord)
pitch2idx = {p: i for i, p in enumerate(sorted(all_pitches))}
idx2pitch = {i: p for p, i in pitch2idx.items()}
vocab_size = len(pitch2idx)
#Group pitches of notes together into chords
#We change the pitches into a binary one-hot encoding for better multi-categorization
sequence_vectors = []
for chord_sequence in sequences:
if len(chord_sequence) > sequence_length:
binary_seq = []
for chord in chord_sequence:
chord_vec = np.zeros(vocab_size)
for pitch in chord:
if pitch in pitch2idx:
chord_vec[pitch2idx[pitch]] = 1.0
binary_seq.append(chord_vec)
sequence_vectors.append(binary_seq)
# Create training data windows based on chords
X, Y = [], []
for seq in sequence_vectors:
for i in range(len(seq) - sequence_length):
X.append(seq[i:i+sequence_length])
Y.append(seq[i+sequence_length])
X = np.array(X, dtype=np.float32)
Y = np.array(Y, dtype=np.float32)
print(f"X shape: {X.shape}, Y shape: {Y.shape}")
Number of sequences with feedback=2: 26446 X shape: (474, 20, 46), Y shape: (474, 46)
In [ ]:
'''
Creating an LSTM model (a more specialized version of an RNN with better
sequential data processing)
We use a bidirection model to consider the past and future chords in the sequence
in hopes to produce more accurate results
'''
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.metrics import BinaryAccuracy
model = Sequential([
Bidirectional(LSTM(256, return_sequences=True, dropout=0.3, recurrent_dropout=0.3), input_shape=(sequence_length, vocab_size)),
Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2)),
Dense(vocab_size, activation='sigmoid', kernel_regularizer=l2(0.001))
])
model.compile(optimizer=Adam(learning_rate=0.01), loss='binary_crossentropy', metrics=[BinaryAccuracy()])
model.summary()
/usr/local/lib/python3.11/dist-packages/keras/src/layers/rnn/bidirectional.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ bidirectional (Bidirectional) │ (None, 20, 512) │ 620,544 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ bidirectional_1 (Bidirectional) │ (None, 256) │ 656,384 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 46) │ 11,822 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 1,288,750 (4.92 MB)
Trainable params: 1,288,750 (4.92 MB)
Non-trainable params: 0 (0.00 B)
In [ ]:
history = model.fit(X, Y, epochs=10, batch_size=32, validation_split=0.2)
# Calculate perplexity using validation loss
final_val_loss = history.history['val_loss'][-1]
val_perplexity = np.exp(final_val_loss)
print(f"Final Validation Loss: {final_val_loss:.4f}")
print(f"Perplexity: {val_perplexity:.4f}")
Epoch 1/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 20s 380ms/step - binary_accuracy: 0.6877 - loss: 0.6349 - val_binary_accuracy: 0.7625 - val_loss: 0.5310 Epoch 2/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 6s 226ms/step - binary_accuracy: 0.7625 - loss: 0.5290 - val_binary_accuracy: 0.7721 - val_loss: 0.4982 Epoch 3/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 224ms/step - binary_accuracy: 0.7719 - loss: 0.4945 - val_binary_accuracy: 0.7645 - val_loss: 0.4819 Epoch 4/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 223ms/step - binary_accuracy: 0.7766 - loss: 0.4744 - val_binary_accuracy: 0.7810 - val_loss: 0.4561 Epoch 5/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 227ms/step - binary_accuracy: 0.7900 - loss: 0.4597 - val_binary_accuracy: 0.7952 - val_loss: 0.4488 Epoch 6/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 3s 259ms/step - binary_accuracy: 0.8057 - loss: 0.4452 - val_binary_accuracy: 0.7998 - val_loss: 0.4393 Epoch 7/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 228ms/step - binary_accuracy: 0.8127 - loss: 0.4298 - val_binary_accuracy: 0.8190 - val_loss: 0.4281 Epoch 8/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 240ms/step - binary_accuracy: 0.8207 - loss: 0.4248 - val_binary_accuracy: 0.8229 - val_loss: 0.4198 Epoch 9/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 255ms/step - binary_accuracy: 0.8213 - loss: 0.4234 - val_binary_accuracy: 0.8149 - val_loss: 0.4179 Epoch 10/10 12/12 ━━━━━━━━━━━━━━━━━━━━ 5s 277ms/step - binary_accuracy: 0.8230 - loss: 0.4221 - val_binary_accuracy: 0.8192 - val_loss: 0.4214 Final Validation Loss: 0.4214 Perplexity: 1.5241
In [ ]:
import matplotlib.pyplot as plt
val_losses = history.history['val_loss']
val_perplexities = [np.exp(loss) for loss in val_losses]
#Plot perplexity
plt.plot(val_perplexities)
plt.xlabel("Epoch")
plt.ylabel("Validation Perplexity")
plt.title("Perplexity over Epochs")
plt.grid(True)
plt.show()
loss = history.history['loss']
val_loss = history.history['val_loss']
acc = history.history['binary_accuracy']
val_acc = history.history['val_binary_accuracy']
epochs = range(1, len(loss) + 1)
plt.figure(figsize=(12, 5))
#Plot Loss
plt.subplot(1, 2, 1)
plt.plot(epochs, loss, 'b-', label='Training Loss')
plt.plot(epochs, val_loss, 'r--', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
#Plot Accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, acc, 'b-', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r--', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Binary Accuracy')
plt.legend()
plt.show()
In [ ]:
!wget https://raw.githubusercontent.com/musescore/MuseScore/master/share/sound/FluidR3Mono_GM.sf3
!pip install midi2audio
!pip install IPython
!pip install miditoolkit
!apt-get update
!apt-get install -y fluidsynth
In [ ]:
import miditoolkit
import subprocess
from IPython.display import Audio
In [ ]:
#Functions to generate the music piece
#Sample a binary chord vector using Bernoulli sampling with temperature scaling.
def sample_chord(probabilities, temperature=1.0):
probs = np.clip(probabilities, 1e-8, 1 - 1e-8)
logits = np.log(probs / (1 - probs)) / temperature
scaled_probs = 1 / (1 + np.exp(-logits))
return np.random.binomial(1, scaled_probs).astype(np.float32)
#Sample a chord from the top-k most probable pitches with Bernoulli sampling.
def top_k_binary_chord(probabilities, k=8, temperature=1.0):
#Apply temperature
probs = np.clip(probabilities, 1e-8, 1 - 1e-8)
logits = np.log(probs / (1 - probs)) / temperature
scaled_probs = 1 / (1 + np.exp(-logits))
#Get top-k indices
top_k_indices = np.argpartition(scaled_probs, -k)[-k:]
top_k_probs = scaled_probs[top_k_indices]
top_k_probs /= np.sum(top_k_probs)
#Bernoulli sampling from top-k
binary_chord = np.zeros_like(probabilities)
sampled = np.random.binomial(1, top_k_probs)
for i, s in zip(top_k_indices, sampled):
binary_chord[i] = s
if binary_chord.sum() == 0:
binary_chord[top_k_indices[np.argmax(top_k_probs)]] = 1
return binary_chord.astype(np.float32)
#Use LSTM to generate a new chord sequence
def generate_chord_sequence(seed_seq, length=50, temperature=1.0):
generated = list(seed_seq)
for _ in range(length):
input_seq = np.array(generated[-sequence_length:]).reshape(1, sequence_length, vocab_size)
pred = model.predict(input_seq, verbose=0)[0]
chord = top_k_binary_chord(pred, k=8, temperature=temperature)
if chord.sum() == 0:
chord[np.argmax(pred)] = 1
# Prevent exact repetition to get more varied results
if len(generated) > 0 and np.array_equal(chord, generated[-1]):
idx = np.random.randint(vocab_size)
chord[idx] = 1 - chord[idx]
generated.append(chord)
return generated
In [ ]:
#Functions to play the music pieces from midi file
#Convert binary vector to list of MIDI pitches since we did one-hot encoding before
def binary_chord_to_pitches(binary_chord, idx2pitch):
return [idx2pitch[i] for i, val in enumerate(binary_chord) if val > 0]
#Save the chords into a midi file
def save_chords_to_midi(chords, filename="generated.mid", velocity=80, duration=480):
midi_obj = miditoolkit.MidiFile()
instrument = miditoolkit.Instrument(program=0, is_drum=False, name="Generated")
time = 0
for chord in chords:
for pitch in chord:
note = miditoolkit.Note(
velocity=velocity,
pitch=int(pitch),
start=time,
end=time + duration
)
instrument.notes.append(note)
time += duration
midi_obj.instruments.append(instrument)
midi_obj.tempo_changes.append(miditoolkit.TempoChange(120, time=0)) # 120 BPM
midi_obj.dump(filename)
print(f"MIDI saved to {filename}")
return filename
#Save the chords into a midi file
#But we changed the timing and velocity up with randomization to produce less
#robotics results. We wanted to see if we could get something more natural sounding
def save_chords_to_midi_varied(chords, filename="generated.mid", base_velocity=80, base_duration=480):
midi_obj = miditoolkit.MidiFile()
instrument = miditoolkit.Instrument(program=0, is_drum=False, name="Generated")
time = 0
for chord in chords:
# Randomize velocity and duration per chord
velocity_variation = random.randint(-10, 10)
duration_variation = random.randint(-60, 40)
velocity = np.clip(base_velocity + velocity_variation, 40, 127)
duration = max(120, base_duration + duration_variation)
time_offset = random.randint(-10, 10)
for pitch in chord:
note = miditoolkit.Note(
velocity=velocity,
pitch=int(pitch),
start=max(0, time + time_offset),
end=max(0, time + time_offset + duration)
)
instrument.notes.append(note)
time += base_duration
midi_obj.instruments.append(instrument)
midi_obj.tempo_changes.append(miditoolkit.TempoChange(120, time=0))
midi_obj.dump(filename)
print(f"MIDI saved to {filename}")
return filename
#Transform the midi file into a wave file with FluidSynth so we can play the music
def midi_to_wav(midi_path, wav_path="output.wav", soundfont="FluidR3_GM.sf2"):
subprocess.run(["fluidsynth", "-ni", soundfont, midi_path, "-F", wav_path, "-r", "44100"])
print(f"WAV saved to {wav_path}")
return wav_path
#Generate a new music output and diplay using IPyhton
def generate_and_play_midi(generated_binary_chords, idx2pitch,
midi_filename="generated.mid",
wav_filename="output.wav",
soundfont="FluidR3Mono_GM.sf2",
convert_to_wav=True):
#Convert binary vectors to pitch lists
pitch_chords = [binary_chord_to_pitches(chord, idx2pitch) for chord in generated_binary_chords]
midi_file = save_chords_to_midi(pitch_chords, filename=midi_filename)
if convert_to_wav:
wav_file = midi_to_wav(midi_file, wav_path=wav_filename, soundfont=soundfont)
display(Audio(wav_file))
else:
print("Error in WAV conversion and playback.")
#Generate a new music output and diplay using IPyhton with varied velocity and duration of notes
def generate_and_play_midi_varied(generated_binary_chords, idx2pitch,
midi_filename="generated.mid",
wav_filename="output.wav",
soundfont="FluidR3Mono_GM.sf2",
convert_to_wav=True):
#Convert binary vectors to pitch lists
pitch_chords = [binary_chord_to_pitches(chord, idx2pitch) for chord in generated_binary_chords]
midi_file = save_chords_to_midi_varied(pitch_chords, filename=midi_filename)
if convert_to_wav:
wav_file = midi_to_wav(midi_file, wav_path=wav_filename, soundfont=soundfont)
display(Audio(wav_file))
else:
print("Error in WAV conversion and playback.")
#We give the model a seed of zero binary arrays to start the generation process
#In the final output, we remove the random seed noise at the beginning to just
#get the unconditioned generated output
seed = [np.zeros(vocab_size, dtype=np.float32) for _ in range(sequence_length)]
generated = generate_chord_sequence(seed, length=100, temperature=1.0)
generate_and_play_midi(generated[sequence_length:], idx2pitch)
MIDI saved to generated.mid WAV saved to output.wav